fetcher: Fix another finalization deadlock
authorColin Walters <walters@verbum.org>
Thu, 8 Sep 2016 16:39:42 +0000 (12:39 -0400)
committerAtomic Bot <atomic-devel@projectatomic.io>
Thu, 8 Sep 2016 20:09:07 +0000 (20:09 +0000)
If the current repo is already up to date (we have no content to
fetch), it's possible for the fetcher to not request any URIs.  So
create and then finalize it quickly.

Finalization involves calling `g_main_loop_quit()` +
`g_thread_wait()`.  However, if `g_main_loop_quit()` is run *before*
`g_main_loop_run()`, we'll deadlock because GMainLoop assumes in
`_run()` to start things.

This is a common trap - ideally, GMainLoop would record if `_quit()`
was called before `_run()` or something, but doing that now would
likely break people who are expecting quit() -> run() to restart.

In general, we've moved in various GLib-consuming apps to an
explicit "main context iteration with termination condition" model;
see `pull_termination_condition()` in the pull code.

This fixes this race condition.

I verified that an assertion in `_finalize` that more than
zero URIs were requested was hit in multiple test cases, and this patch
has survived a while of make check loops.

Closes: https://github.com/ostreedev/ostree/issues/496
Closes: #499
Approved by: jlebon

src/libostree/ostree-fetcher.c

index 09006b71f6966d9571525ad47cf40883604b9f9e..3ddf2389f6c1e8e92ee2a7257a97c5930873bc1e 100644 (file)
@@ -46,7 +46,7 @@ typedef struct {
 
   SoupSession *session;  /* not referenced */
   GMainContext *main_context;
-  GMainLoop *main_loop;
+  volatile gint running;
 
   int tmpdir_dfd;
   char *tmpdir_name;
@@ -144,7 +144,6 @@ thread_closure_unref (ThreadClosure *thread_closure)
       g_assert (thread_closure->session == NULL);
 
       g_clear_pointer (&thread_closure->main_context, g_main_context_unref);
-      g_clear_pointer (&thread_closure->main_loop, g_main_loop_unref);
 
       if (thread_closure->tmpdir_dfd != -1)
         close (thread_closure->tmpdir_dfd);
@@ -503,7 +502,12 @@ ostree_fetcher_session_thread (gpointer data)
     }
   closure->max_outstanding = 3 * max_conns;
 
-  g_main_loop_run (closure->main_loop);
+  /* This model ensures we don't hit a race using g_main_loop_quit();
+   * see also what pull_termination_condition() in ostree-repo-pull.c
+   * is doing.
+   */
+  while (g_atomic_int_get (&closure->running))
+    g_main_context_iteration (closure->main_context, TRUE);
 
   /* Since the ThreadClosure may be finalized from any thread we
    * unreference all data related to the SoupSession ourself to ensure
@@ -567,7 +571,8 @@ _ostree_fetcher_finalize (GObject *object)
   OstreeFetcher *self = OSTREE_FETCHER (object);
 
   /* Terminate the session thread. */
-  g_main_loop_quit (self->thread_closure->main_loop);
+  g_atomic_int_set (&self->thread_closure->running, 0);
+  g_main_context_wakeup (self->thread_closure->main_context);
   if (self->session_thread)
     {
       /* We need to explicitly synchronize to clean up TLS */
@@ -594,7 +599,7 @@ _ostree_fetcher_constructed (GObject *object)
   self->thread_closure = g_slice_new0 (ThreadClosure);
   self->thread_closure->ref_count = 1;
   self->thread_closure->main_context = g_main_context_ref (main_context);
-  self->thread_closure->main_loop = g_main_loop_new (main_context, FALSE);
+  self->thread_closure->running = 1;
   self->thread_closure->tmpdir_dfd = -1;
   self->thread_closure->tmpdir_lock = empty_lockfile;